#!/usr/bin/bash

source $HOME/.bash_profile
conda activate lxb39
cd ~/projects/LLaVA
# MODEL_PATH=$CKPTS/mamba-vl-790m-hf
# MODEL_PATH=$OUTPUTS/LLaVA/mamba-vl-790m-1st-projector-mamba-epochs-1-lr-projector-1e-3-mamba-6e-4-extra-wd-0.0
# MODEL_PATH=$OUTPUTS/LLaVA/mamba-vl-790m-2nd-all-epochs-2-lr-1e-4-res-336-no-llrd-no-neftune
MODEL_PATH=$OUTPUTS/LLaVA/mamba-vl-790m-2nd-all-epochs-4-lr-1e-4-res-672-chinese
# DATA_PATH=$DATASETS/LLaVA-Instruct-150K/llava_v1_5_mix665k_coco_karpathy_val_deduplication.json
# DATA_PATH=$DATASETS/LLaVA-Instruct-150K/llava_v1_5_mix665k_coco_karpathy_test_deduplication.json
# DATA_PATH=$DATASETS/LLaVA-Instruct-150K/chinese_llava_v1_5_mix665k.json
# DATA_PATH=$DATASETS/coco_karpathy_split/annotations/coco_karpathy_train_gt_for_mpt_conv_llava.json
DATA_PATH=$DATASETS/sciencedb/ceramics/train_annotations.json
# DATA_PATH=$DATASETS/sciencedb/bamboo_slips/train_annotations.json
IMAGE_FOLDER=$DATASETS
# IMAGE_FOLDER=$DATASETS/coco_karpathy_split/images/train
# VISION_TOWER=$CKPTS/clip-vit-large-patch14-336
# VISION_TOWER=$CKPTS/clip-vit-large-patch14-672
# VISION_TOWER=$MODEL_PATH/clip-vit-large-patch14-336
VISION_TOWER=$MODEL_PATH/clip-vit-large-patch14-672
PRETRAIN_MM_MLP_ADAPTER=None
COCO_CAPTION_PROMPT_FILE=$PROJECTS/LLaVA/coco_caption_prompt.txt
OUTPUT_DIR=$OUTPUTS/LLaVA/mamba-vl-790m-3rd-all-epochs-8-lr-5e-5-res-672-chinese-ceramics-2nd-4-epochs


deepspeed --master_port 23333 --include="localhost:0,1,2,3" llava/train/train_mem.py \
    --model_name_or_path $MODEL_PATH \
    --version mpt \
    --data_path $DATA_PATH \
    --image_folder $IMAGE_FOLDER \
    --vision_tower $VISION_TOWER \
    --pretrain_mm_mlp_adapter $PRETRAIN_MM_MLP_ADAPTER \
    --mm_projector_type mlp2x_gelu \
    --tune_mm_mlp_adapter False \
    --tune_vision_tower True \
    --vision_tower_lr 5e-5 \
    --vision_tower_lldr 0.9 \
    --coco_caption_sft False \
    --coco_caption_prompt_file $COCO_CAPTION_PROMPT_FILE \
    --neftune_alpha 1 \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --fp16 False \
    --bf16 False \
    --output_dir $OUTPUT_DIR \
    --num_train_epochs 8 \
    --per_device_train_batch_size 8 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 2000 \
    --save_total_limit 1 \
    --mm_projector_lr 5e-5 \
    --mm_projector_wd 0.0 \
    --learning_rate 5e-5 \
    --weight_decay 0.0 \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 4096 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb
